********** Generates data summary statistics for survey 1 data, comparing the full sample to the sample of individuals who self-reported to be sure of their responses

cd ""
clear 
set more off

use data_survey_merged

* excludes observations identified as low quality by the survey provider: deletes those from wave1 and replace those in wave2 with a blank (so that the wave1 observation is preserved if that one is OK for a given individual but wave2 is not) 
drop if inlist(dynata_outliers,1)
foreach var of varlist w2_* {
	capture replace `var'=. if dynata_outliers_w2==1
	capture replace `var'="." if dynata_outliers_w2==1
	}
	
* replaces non-sensical GPA % outliers	
replace GPA_pct=. if GPA_pct==0 | GPA_pct>1	
	
gen measure=""	
	
foreach var in obs av sd min max    sure_obs sure_av sure_sd  diff_av  sure_min sure_max {
	capture drop r_`var'
	capture gen r_`var'=.
	}
	
	

gen end=substr(end_date,1,10)
gen end_d=date(end,"MDY")
gen w2_end=substr(w2_end_date,1,10)
gen w2_end_d=date(w2_end,"MDY")
gen t_recontact=w2_end_d-end_d
	
* recodes variables into 0-1
gen AU=cntry==1	
gen CA=cntry==2	
gen UK=cntry==3	
gen US=cntry==4	

* (1=high school, 2=college, 3=employed, 4=unemployed, 5=other)
gen p_high=profession==1
gen p_col=profession==2
gen p_emp=profession==3
gen p_unemp=profession==4
gen p_other=profession==5

* (1: <high school, 2=high schoool, 3=bachelors, 4=masters, 5=other)
gen h_nohigh=highest_edu==1
gen h_high=highest_edu==2
gen h_bc=highest_edu==3
gen h_ma=highest_edu==4
gen h_other=highest_edu==5
	
gen sure_high=(sure_pers==5)

	

	
* indicator for answering both waves 
gen both_waves=0
replace both_waves=1 if wave2==1
tab both_waves
gen both_waves_ok=(w2_BFI2_1!=.)
tab both_waves_ok
	
global dems "male age AU CA UK US  p_high p_col p_emp p_unemp p_other h_nohigh h_high h_bc h_ma h_other GPA_pct LS_gen q_mood_beginning_1 q_mood_end_1 sure_pers reliable pc t_recontact length_m"
global traits "extrav consci neurot agree open cognitive"
global facets "sociability assertiveness energy compassion respectfulness trust organization productiveness responsibility anxiety depression em_volatily curiosity aesthetic_sense imagination"
global prefs "GPS_risk GPS_time  GPS_pres_bias GPS_alt GPS_trust GPS_pos_rec GPS_neg_rec_self GPS_neg_rec_self2 GPS_neg_rec_other"
global wellb "LS_gen SWLS q_mood_beginning_1 q_mood_end_1"

	
* recode relevant variables to be 0-10 for reporting (instead of 1-11)
foreach var of varlist GPS_risk GPS_time  GPS_pres_bias GPS_alt GPS_trust GPS_pos_rec 	GPS_neg_rec_self GPS_neg_rec_self2 GPS_neg_rec_other      LS_gen SWLS q_mood_beginning_1 q_mood_end_1 reliable cog_* {
	replace `var'=`var'-1
	replace w2_`var'=w2_`var'-1

}	

	
local i=1	
foreach var of varlist $dems {
	replace measure="`var'" in `i'
	sum `var',d
	replace r_obs=`r(N)' in `i'
	replace r_av=`r(mean)' in `i'
	replace r_min=`r(min)' in `i'
	replace r_max=`r(max)' in `i'

	
	replace r_sd=`r(sd)' in `i'





	local pre 

	sum `pre'`var' if sure_high==1,d
	replace r_sure_obs=`r(N)' in `i'
	replace r_sure_av=`r(mean)' in `i'
	replace r_sure_min=`r(min)' in `i'
	replace r_sure_max=`r(max)' in `i'
		
	replace r_sure_sd=`r(sd)' in `i'

	
	local ++i
	}
	

*** determine statistically significant predictors of self-reporting to be sure that one's answers describe oneself accurately
local i=1
capture drop names 
capture gen names=""
capture drop pval
capture gen pval=.
capture drop coeff
capture gen coeff=.


foreach var of varlist $dems  {

   replace names="`var'" in `i'
   reg sure_high `var' 
   replace pval=r(table)[4,1] in `i'
   replace coeff=r(table)[1,1] in `i'
   
   reg sure_high `var' 
  
   
   local ++i
}

* difference in sample means/proportions for the reliable and full sample
replace r_diff_av=r_sure_av-r_av

order r_diff_av,last
*** Below yields the numbers for Table A.2		
br measure r_*	pval coeff



************* Repeat above for measured constructs
drop measure r_*	pval coeff
	
gen measure=""	
	
foreach var in obs av sd     sure_obs sure_av sure_sd  diff_av   {
	capture drop r_`var'
	capture gen r_`var'=.
	}
	

local i=1	
foreach var of varlist extrav  consci   neurot agree   open   GPS_risk GPS_time  GPS_pres_bias GPS_alt GPS_trust GPS_pos_rec 	GPS_neg_rec_self GPS_neg_rec_self2 GPS_neg_rec_other      LS_gen SWLS cognitive  {
	replace measure="`var'" in `i'
	
	
	sum `var',d
	replace r_obs=`r(N)' in `i'
	replace r_av=`r(mean)' in `i'

	
	replace r_sd=`r(sd)' in `i'


	local pre 
	

	
	sum `pre'`var' if sure_high==1,d
	replace r_sure_obs=`r(N)' in `i'
	replace r_sure_av=`r(mean)' in `i'

		
	replace r_sure_sd=`r(sd)' in `i'


	
	local ++i
	}
	

*** determine statistically significant predictors of self-reporting to be sure that one's answers describe oneself accurately
local i=1
capture drop names 
capture gen names=""
capture drop pval
capture gen pval=.
capture drop coeff
capture gen coeff=.
foreach var of varlist extrav  consci   neurot agree   open   GPS_risk GPS_time  GPS_pres_bias GPS_alt GPS_trust GPS_pos_rec 	GPS_neg_rec_self GPS_neg_rec_self2 GPS_neg_rec_other      LS_gen SWLS cognitive {

   replace names="`var'" in `i'
   reg `var' sure_high 
   replace pval=r(table)[4,1] in `i'
   replace coeff=r(table)[1,1] in `i'
   
   local ++i
}

* difference in sample means/proportions for the reliable and full sample
replace r_diff_av=r_sure_av-r_av

*** Below yields the numbers for Table A.4		
br measure r_*	pval coeff
